### R script for Fish 444 pop structure lab ### Charlie Waters 1/30/2014 ######################################### # 1. Set up R session ######################################### #A. #Install each of the following packages; #To minimize the chance of errors, #hit Ctrl+Enter for each line separately install.packages("BDgraph") install.packages("ape") install.packages("ade4") install.packages("adegenet") install.packages("diveRsity") install.packages("doParallel") install.packages("foreach") install.packages("genetics") install.packages("hierfstat") install.packages("iterators") install.packages("parallel")### This may not be available but it's ok. install.packages("sendplot") install.packages("xlsx") #B. Load the installed packages in to R for use #Again, hit Ctrl+Enter for each line separately #Or use R studio panel library(ape) library(ade4) library(adegenet) library(diveRsity) library(doParallel) library(foreach) library(genetics) library(hierfstat) library(iterators) library(parallel) library(sendplot) library(xlsx) library(BDgraph) # C. Set the working directory to a specific folder #just change my UWNetID to yours #and change the folder to the one you created on the Desktop setwd("C:/users/jheare/Desktop/researchproject") #D. import a Genepop file and saves it as "cod_data_genepop" #in the form of a "genind" object salmon <- read.genepop("Class_data_genepop_new.gen", missing=NA) #E. Gives a summary of the data, #including number of individuals, alleles per locus, etc. summary(salmon) ###################################################################################################### # CODE FOR PART A - TEST WHETHER THE POPULATIONS DEVIATE FROM HARDY-WEINBERG EQUILIBRIUM ###################################################################################################### ### Break up the entire data set into genind objects for each population pop_labels <- c(rep("JA",90),rep("KO",86), rep("AD",45),rep("AI",92),rep("AT",45),rep("UP",87), rep("KI",106),rep("HS",89),rep("WA",69),rep("SG",94),rep("PS",20)) ## Creates a vector containing the population assignments of each individual #numbers in code are numbers of individuals in each population ## Creates a list of genind objects for each population pops_separated <- seppop(cod_data_genepop,pop=pop_labels) names(pops_separated) #Creates a genind object comprising only the AD individuals data_AD <-pops_separated$AD #Verify that the genind object has the correct number of individuals and loci data_AD ###Repeat for all populations data_AI <-pops_separated$AI data_AT <-pops_separated$AT data_HS <-pops_separated$HS data_JA <-pops_separated$JA data_KI <-pops_separated$KI data_KO <-pops_separated$KO data_PS <-pops_separated$PS data_SG <-pops_separated$SG data_UP <-pops_separated$UP data_WA <-pops_separated$WA #### Compute observed and expected heterzygosity for #each population over all loci summary_AD <- summary(data_AD) mean(summary_AD$Hexp) mean(summary_AD$Hobs) ### Test whether Hobs and Hexp are significantly different t.test(summary_AD$Hobs,summary_AD$Hexp,paired=TRUE) summary_AI <- summary(data_AI) mean(summary_AI$Hexp) mean(summary_AI$Hobs) t.test(summary_AI$Hobs,summary_AI$Hexp,paired=TRUE) summary_AT <- summary(data_AT) mean(summary_AT$Hexp) mean(summary_AT$Hobs) t.test(summary_AT$Hobs,summary_AT$Hexp,paired=TRUE) summary_HS <- summary(data_HS) mean(summary_HS$Hexp) mean(summary_HS$Hobs) t.test(summary_HS$Hobs,summary_HS$Hexp,paired=TRUE) summary_JA <- summary(data_JA) mean(summary_JA$Hexp) mean(summary_JA$Hobs) t.test(summary_JA$Hobs,summary_JA$Hexp,paired=TRUE) summary_KI <- summary(data_KI) mean(summary_KI$Hexp) mean(summary_KI$Hobs) t.test(summary_JA$Hobs,summary_JA$Hexp,paired=TRUE) summary_KO <- summary(data_KO) mean(summary_KO$Hexp) mean(summary_KO$Hobs) t.test(summary_KO$Hobs,summary_KO$Hexp,paired=TRUE) summary_PS <- summary(data_PS) mean(summary_PS$Hexp) mean(summary_PS$Hobs) t.test(summary_PS$Hobs,summary_PS$Hexp,paired=TRUE) summary_SG <- summary(data_SG) mean(summary_SG$Hexp) mean(summary_SG$Hobs) t.test(summary_SG$Hobs,summary_SG$Hexp,paired=TRUE) summary_UP <- summary(data_UP) mean(summary_UP$Hexp) mean(summary_UP$Hobs) t.test(summary_UP$Hobs,summary_UP$Hexp,paired=TRUE) summary_WA <- summary(data_WA) mean(summary_WA$Hexp) mean(summary_WA$Hobs) t.test(summary_WA$Hobs,summary_WA$Hexp,paired=TRUE) ################################################################################################################################## ### QUESTION: ### ARE THERE ANY POPULATIONS THAT SIGNIFICANTLY DEVIATE FROM HWE? IF SO, WHICH ONES? ################################################################################################################################## ### Now test for deviations from HWE at each locus within each population #This test uses simulation to compute a pvalue for HWE HWE_test_results <- HWE.test(salmon,pop=NULL,permut=TRUE, nsim=10000,res.type="matrix") #We want to correct for multiple tests, even though the Bonferroni is conservative corrected_pval <- 0.05/(11*11) corrected_pval #Creates a table of True/False for loci that are out of HWE (TRUE=out of HWE) HWE_logical <- HWE_test_results